The objectives of this notebook are:
Visualize how well we could remove technical variability associated with the assay (scATAC or multiome). We should see a high degree of intermixing between the 2 assays.
Plot several markers of doublets: scRNAseq doublets determined by multiome, scrublet predictions, number of features, Chromatin Signature, etc. The broader objective of level 2 is to eliminate most remaining doublets and poor-quality cells, as we will discuss in future notebooks. These visualizations will allow us to explore this. We will also plot their location in the UMAP obtained at level 1.
library(Seurat)
library(tidyverse)
library(reshape2)
library(ggpubr)
# Paths
path_to_obj <- str_c(
here::here("scATAC-seq/results/R_objects/level_3/"),
params$cell_type,
"/",
params$cell_type,
"_integrated_level_3.rds",
sep = ""
)
path_to_obj_RNA <- str_c(
here::here("scRNA-seq/3-clustering/3-level_3/"),
params$cell_type,
"_clustered_level_3_with_pre_freeze.rds",
sep = ""
)
path_to_doublets <- here::here("scRNA-seq/3-clustering/2-level_2/tmp/doublets_multiome_df_all.rds")
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "chocolate1", "coral2", "blueviolet",
"brown1", "darkmagenta", "deepskyblue1", "dimgray",
"deeppink1", "green", "lightgray", "hotpink1",
"indianred4", "khaki", "mediumorchid2", "gold", "gray")
# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 270784 features across 3395 samples within 1 assay
## Active assay: peaks_macs (270784 features, 107717 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
seurat_RNA <- readRDS(path_to_obj_RNA)
seurat_RNA
## An object of class Seurat
## 37378 features across 10998 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, umap, harmony
tonsil_RNA_annotation <- seurat_RNA@meta.data %>%
rownames_to_column(var = "cell_barcode") %>%
dplyr::filter(assay == "multiome") %>%
dplyr::select("cell_barcode", "seurat_clusters")
colnames(tonsil_RNA_annotation) <- c("cell_barcode", "seurat_clusters_RNA")
p1 <- DimPlot(seurat,
pt.size = 0.1)
p2 <- DimPlot(seurat_RNA,
group.by = "seurat_clusters",
pt.size = 0.1,cols = color_palette)
p1
p2
p_assay <- plot_split_umap(seurat, var = "assay")
p_assay
Here, we can see the count of doublets detected by cell-type. Note that for the level1 annotation there are not doublets detected in the Naive and Memory B cell cluster.
multiome_doublets <- readRDS(path_to_doublets)
dfm = melt(table(multiome_doublets$cell_type))
dfm$value = as.numeric(as.character(dfm$value))
ggbarplot(dfm, x = "Var1", y = "value",
fill = "Var1",
palette = "jco",
sort.val = "desc",
sort.by.groups = FALSE,
x.text.angle = 90
)
doublets_cells <- colnames(seurat)[which(colnames(seurat) %in% multiome_doublets$barcode)]
length(doublets_cells)
## [1] 0
DimPlot(
seurat, reduction = "umap",
cols.highlight = "darkred", cols= "grey",
cells.highlight= doublets_cells,
pt.size = 0.1
)
## Scrublet prediction
# Scrublet
DimPlot(seurat, group.by = "scrublet_predicted_doublet_atac")
table(seurat$scrublet_predicted_doublet_atac)
##
## FALSE TRUE
## 3167 228
qc_vars <- c(
"nCount_peaks",
"nFeature_peaks",
"nucleosome_signal",
"TSS.enrichment"
)
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, features = x, max.cutoff = "q95")
p
})
qc_gg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
qc_vars <- c(
"annotation_prob")
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, features = x)
p
})
qc_gg
## [[1]]
qc_vars <- c("NBC.MBC", "GCBC", "PC", "CD4.T", "Cytotoxic")
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, feature = x,
max.cutoff = 4, min.cutoff = -4) +
scale_color_viridis_c(option = "magma")
p
})
qc_gg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
Annotation level 3 for scATAC will be defined “a priori” as unannotated and the scRNA annotation will be transfered to the scATAC-multiome cells based on the same cell barcode.
cell_testing <- c("NBC_MBC", "GCBC", "PC", "CD4_T","Cytotoxic")
if (unique(seurat$annotation_level_1) %in% cell_testing){
seurat_df <- data.frame(cell_barcode = colnames(seurat)[seurat$assay == "scATAC"])
seurat_df$seurat_clusters_RNA <- "unannotated"
df_all <- rbind(tonsil_RNA_annotation,seurat_df)
rownames(df_all) <- df_all$cell_barcode
df_all <- df_all[colnames(seurat), ]
seurat$seurat_clusters_RNA <- df_all$seurat_clusters_RNA
seurat@meta.data$annotation_prob <- 1
melt(table(seurat$seurat_clusters_RNA))
table(is.na(seurat$seurat_clusters_RNA))
seurat_multiome <- subset(seurat, assay == "multiome")
p3 <- DimPlot(seurat_multiome, pt.size = 0.7, group.by = "seurat_clusters_RNA",cols = color_palette)
p3
}
p2 + p3
umap_level_1 <- FeatureScatter(
seurat,
"UMAP_1_level_1",
"UMAP_2_level_1",
group.by = "annotation_level_1"
)
umap_level_1 <- umap_level_1 +
theme(
#legend.position = "none",
plot.title = element_blank()
)
umap_level_1
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] es_ES.UTF-8/UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Signac_1.1.0.9000 ggpubr_0.4.0 reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 tidyverse_1.3.0 Seurat_3.9.9.9010 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.1 SnowballC_0.7.0 rtracklayer_1.48.0 GGally_2.0.0 bit64_4.0.5 knitr_1.30 irlba_2.3.3 DelayedArray_0.14.0 data.table_1.13.2 rpart_4.1-15 RCurl_1.98-1.2 AnnotationFilter_1.12.0 generics_0.1.0 BiocGenerics_0.34.0 GenomicFeatures_1.40.1 cowplot_1.1.0 RSQLite_2.2.1 RANN_2.6.1 future_1.20.1 bit_4.0.4 spatstat.data_2.1-0 xml2_1.3.2 lubridate_1.7.9 httpuv_1.5.4 ggsci_2.9 SummarizedExperiment_1.18.1 assertthat_0.2.1 xfun_0.18 hms_0.5.3 evaluate_0.14 promises_1.1.1 fansi_0.4.1 progress_1.2.2 dbplyr_1.4.4 readxl_1.3.1 igraph_1.2.6 DBI_1.1.0 htmlwidgets_1.5.2 reshape_0.8.8 stats4_4.0.3 ellipsis_0.3.1 backports_1.2.0
## [43] bookdown_0.21 biomaRt_2.44.4 deldir_0.2-3 vctrs_0.3.4 Biobase_2.48.0 here_1.0.1 ensembldb_2.12.1 ROCR_1.0-11 abind_1.4-5 withr_2.3.0 ggforce_0.3.2 BSgenome_1.56.0 checkmate_2.0.0 sctransform_0.3.1 GenomicAlignments_1.24.0 prettyunits_1.1.1 goftest_1.2-2 cluster_2.1.0 lazyeval_0.2.2 crayon_1.3.4 labeling_0.4.2 pkgconfig_2.0.3 tweenr_1.0.1 GenomeInfoDb_1.24.0 nlme_3.1-150 ProtGenerics_1.20.0 nnet_7.3-14 rlang_0.4.11 globals_0.13.1 lifecycle_0.2.0 miniUI_0.1.1.1 BiocFileCache_1.12.1 modelr_0.1.8 rsvd_1.0.3 dichromat_2.0-0 cellranger_1.1.0 rprojroot_2.0.2 polyclip_1.10-0 matrixStats_0.57.0 lmtest_0.9-38 graph_1.66.0 ggseqlogo_0.1
## [85] Matrix_1.3-2 carData_3.0-4 zoo_1.8-8 reprex_0.3.0 base64enc_0.1-3 ggridges_0.5.2 png_0.1-7 viridisLite_0.3.0 bitops_1.0-6 KernSmooth_2.23-17 Biostrings_2.56.0 blob_1.2.1 parallelly_1.21.0 jpeg_0.1-8.1 rstatix_0.6.0 S4Vectors_0.26.0 ggsignif_0.6.0 scales_1.1.1 memoise_1.1.0 magrittr_1.5 plyr_1.8.6 ica_1.0-2 zlibbioc_1.34.0 compiler_4.0.3 RColorBrewer_1.1-2 fitdistrplus_1.1-1 Rsamtools_2.4.0 cli_2.1.0 XVector_0.28.0 listenv_0.8.0 patchwork_1.1.0 pbapply_1.4-3 ps_1.4.0 htmlTable_2.1.0 Formula_1.2-4 MASS_7.3-53 mgcv_1.8-33 tidyselect_1.1.0 stringi_1.5.3 yaml_2.2.1 askpass_1.1 latticeExtra_0.6-29
## [127] ggrepel_0.8.2 grid_4.0.3 VariantAnnotation_1.34.0 fastmatch_1.1-0 tools_4.0.3 future.apply_1.6.0 parallel_4.0.3 rio_0.5.16 rstudioapi_0.12 lsa_0.73.2 foreign_0.8-80 gridExtra_2.3 farver_2.0.3 Rtsne_0.15 digest_0.6.27 BiocManager_1.30.10 shiny_1.5.0 Rcpp_1.0.5 GenomicRanges_1.40.0 car_3.0-10 broom_0.7.2 later_1.1.0.1 RcppAnnoy_0.0.16 OrganismDbi_1.30.0 httr_1.4.2 AnnotationDbi_1.50.3 ggbio_1.36.0 biovizBase_1.36.0 colorspace_2.0-0 rvest_0.3.6 XML_3.99-0.3 fs_1.5.0 tensor_1.5 reticulate_1.18 IRanges_2.22.1 splines_4.0.3 RBGL_1.64.0 uwot_0.1.8.9001 RcppRoll_0.3.0 spatstat.utils_2.1-0 plotly_4.9.2.1 xtable_1.8-4
## [169] jsonlite_1.7.1 spatstat_1.64-1 R6_2.5.0 Hmisc_4.4-1 pillar_1.4.6 htmltools_0.5.1.1 mime_0.9 glue_1.4.2 fastmap_1.0.1 BiocParallel_1.22.0 codetools_0.2-17 lattice_0.20-41 curl_4.3 leiden_0.3.5 zip_2.1.1 openxlsx_4.2.3 openssl_1.4.3 survival_3.2-7 rmarkdown_2.5 munsell_0.5.0 GenomeInfoDbData_1.2.3 haven_2.3.1 gtable_0.3.0